packages <- c(
"CIMseq", "CIMseq.data", "printr", "ggthemes", "tidyverse"
)
purrr::walk(packages, library, character.only = TRUE)
rm(packages)
##DATA
load('../data/CIMseqData.rda')
load('../data/sObj.rda')
#rename classes
renameClasses <- function(class) {
case_when(
class == "0" ~ "C.Lgr5.proximal.1",
class == "1" ~ "SI.Stem",
class == "2" ~ "C.TA.proximal.1",
class == "3" ~ "C.Lgr5.proximal.2",
class == "4" ~ "C.Lgr5.distal",
class == "5" ~ "SI.Lgr5.Mki67",
class == "6" ~ "C.Goblet.distal.1",
class == "7" ~ "SI.TA.1",
class == "8" ~ "C.TA.proximal.2",
class == "9" ~ "C.Goblet.distal.2",
class == "10" ~ "SI.TA.2",
class == "11" ~ "C.Goblet.proximal.1",
class == "12" ~ "SI.Goblet",
class == "13" ~ "SI.Enterocytes",
class == "14" ~ "C.Colonocytes.distal",
class == "15" ~ "C.Goblet.distal.3",
class == "16" ~ "C.Lgr5.Mki67.proximal.1",
class == "17" ~ "C.TA.distal",
class == "18" ~ "C.Lgr5.Mki67.proximal.2",
class == "19" ~ "C.Colonocyte.proximal.1",
class == "20" ~ "Tufft",
class == "21" ~ "C.Colonocyte.proximal.2",
class == "22" ~ "C.Goblet.proximal.2",
class == "23" ~ "Enteroendocrine",
class == "24" ~ "C.Goblet.distal.4",
class == "25" ~ "SI.Paneth",
TRUE ~ "error"
)
}
c.order <- c(
"SI.Goblet", "SI.Paneth", "SI.Stem", "SI.Lgr5.Mki67", "SI.TA.1", "SI.TA.2", "SI.Enterocytes",
"C.Goblet.proximal.1", "C.Goblet.proximal.2", "C.Lgr5.proximal.1", "C.Lgr5.proximal.2", "C.Lgr5.Mki67.proximal.1", "C.Lgr5.Mki67.proximal.2", "C.TA.proximal.1", "C.TA.proximal.2", "C.Colonocyte.proximal.1", "C.Colonocyte.proximal.2",
"C.Goblet.distal.1", "C.Goblet.distal.2", "C.Goblet.distal.3", "C.Goblet.distal.4", "C.Lgr5.distal", "C.TA.distal", "C.Colonocytes.distal",
"Tufft", "Enteroendocrine"
)
getData(cObjSng, "classification") <- renameClasses(getData(cObjSng, "classification"))
fractions <- getData(sObj, "fractions")
colnames(fractions) <- renameClasses(colnames(fractions))
sObj@fractions <- fractions
plotUnsupervisedClass(cObjSng, cObjMul)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedMarkers(
cObjSng, cObjMul,
c("Lgr5", "Ptprc", "Chga", "Dclk1", "Slc26a3", "Atoh1", "Alpi", "Lyz1"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Mki67"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Plet1"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Hoxb13"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedClass(cObjSng, cObjMul) %>%
plotData() %>%
inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
ggplot() +
geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = unique_key)) +
ggthemes::theme_few() +
scale_colour_manual(values = col40())
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedClass(cObjSng, cObjMul) %>%
plotData() %>%
inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
mutate(subject_id = as.character(subject_id)) %>%
ggplot() +
geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = subject_id), alpha = 0.3) +
ggthemes::theme_few() +
scale_colour_manual(values = col40())
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedClass(cObjSng, cObjMul) %>%
plotData() %>%
inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
mutate(subject_age = as.character(subject_age)) %>%
ggplot() +
geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = subject_age)) +
ggthemes::theme_few() +
scale_colour_manual(values = col40())
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
tibble(
sample = colnames(getData(cObjSng, "counts")),
class = getData(cObjSng, "classification")
) %>%
mutate(source = if_else(
str_detect(sample, "SRR654") | str_detect(sample, "SRR510"),
"External", "Enge"
)) %>%
group_by(source, class) %>%
summarize(n = n()) %>%
ungroup() %>%
group_by(source) %>%
mutate(`%` = n / sum(n) * 100) %>%
ggplot() +
geom_bar(aes(source, `%`, fill = source), stat = "identity", position = position_dodge(width = 1)) +
facet_wrap(~class, scales = "free") +
labs(x = "Source", y = "% of dataset") +
theme_bw() +
theme(
legend.position = "top",
axis.title.x = element_blank()
) +
guides(fill = FALSE)
getData(cObjSng, "dim.red") %>%
matrix_to_tibble("sample") %>%
setNames(c("sample", "UMAP.dim1", "UMAP.dim2")) %>%
mutate(source = case_when(
str_detect(sample, "SRR654") ~ "Tabula Muris",
str_detect(sample, "SRR510") ~ "Regev",
TRUE ~ "Enge"
)) %>%
sample_n(nrow(.), FALSE) %>%
ggplot() +
geom_point(aes(UMAP.dim1, UMAP.dim2, colour = source), alpha = 0.75) +
theme_few()
averageGroupExpression <- function(exp, classes) {
c <- unique(classes)
means <- purrr::map_dfc(c, function(x) {
data.frame(rowMeans(exp[, classes == x]))
})
means <- as.matrix(means)
colnames(means) <- c
return(means)
}
av.exp <- averageGroupExpression(
getData(cObjSng, "counts.cpm")[getData(cObjMul, "features"), ],
getData(cObjSng, "classification")
)
p.cor <- cor(av.exp, method = "pearson")
p.cor %>%
matrix_to_tibble("to") %>%
gather(from, cor, -to) %>%
mutate(
from = parse_factor(from, levels = c.order),
to = parse_factor(to, levels = c.order)
) %>%
ggplot() +
geom_tile(aes(from, to, fill = cor)) +
scale_fill_viridis_c() +
theme_few() +
theme(
axis.text.x = element_text(angle = 90),
axis.title = element_blank()
) +
guides(fill = guide_colorbar(title = "Pearson's\ncorrelation"))
There are 2137 multiplets in the analysis.
adj <- adjustFractions(cObjSng.enge, cObjMul, sObj, theoretical.max = 4)
nConnections <- rowSums(adj)
table(nConnections) / length(nConnections) * 100
| 0 | 1 | 2 | 3 | 4 | 5 |
|---|---|---|---|---|---|
| 5.568554 | 31.44595 | 39.77539 | 17.45438 | 4.960225 | 0.7955077 |
tibble(
sample = names(nConnections),
connections = nConnections
) %>%
inner_join(MGA.Meta, by = "sample") %>%
count(sub_tissue, connections)
| sub_tissue | connections | n |
|---|---|---|
| colon | 0 | 111 |
| colon | 1 | 556 |
| colon | 2 | 625 |
| colon | 3 | 297 |
| colon | 4 | 98 |
| colon | 5 | 16 |
| small_intestine | 0 | 8 |
| small_intestine | 1 | 116 |
| small_intestine | 2 | 225 |
| small_intestine | 3 | 76 |
| small_intestine | 4 | 8 |
| small_intestine | 5 | 1 |
fractions <- getData(sObj, "fractions")
tibble(fractions = c(fractions)) %>%
ggplot() +
geom_histogram(aes(fractions), binwidth = 0.01) +
theme_bw()
costs <- getData(sObj, "costs")
tibble(
nCellTypes = nConnections,
cost = costs
) %>%
ggplot() +
geom_boxplot(aes(nCellTypes, cost, group = nCellTypes)) +
scale_x_continuous(name = "Detected cell types", breaks = 0:max(nConnections)) +
theme_bw()
estimatedCells <- estimateCells(cObjSng.enge, cObjMul, theoretical.max = 4)
tibble(
sample = names(costs),
cost = unname(costs)
) %>%
inner_join(
select(estimatedCells, sample, estimatedCellNumber),
by = "sample"
) %>%
mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, cost, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number"
#breaks = 0:max(round(pull(estimateCells(cObjSng, cObjMul), estimatedCellNumber)))
) +
theme_bw()
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(estimatedCells) %>%
mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, detectedConnections, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number",
breaks = 0:max(round(pull(filter(estimatedCells, sampleType == "Multiplet"), estimatedCellNumber)))
) +
scale_y_continuous(
name = "Detected cell number",
breaks = 0:max(round(nConnections))
) +
theme_bw()
## Joining, by = "sample"
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.counts = colSums(getData(cObjMul, "counts"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.counts, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total counts") +
theme_bw()
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.ercc = colSums(getData(cObjMul, "counts.ercc"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.ercc, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total ERCC counts") +
theme_bw()
plotSwarmCircos(sObj, cObjSng.enge, cObjMul, classOrder = c.order)
Only detected duplicates and triplicates.
ERCC estimated cell number max = 4.
Weight cutoff = 10.
adj <- adjustFractions(cObjSng.enge, cObjMul, sObj, binary = TRUE)
samples <- rownames(adj)
rs <- rowSums(adj)
keep <- rs == 2 | rs == 3 | rs == 4
plotSwarmCircos(
filterSwarm(sObj, keep), cObjSng.enge, cObjMul, weightCut = 10,
classOrder = c.order, theoretical.max = 4)